library(scales)
library(gridExtra)
library(ggplot2)

#### Functions ####

theme_bes <- function (base_size = 11, base_family = "") {
  theme_grey(base_size = base_size, base_family = base_family) %+replace% 
    theme(panel.background = element_rect(fill = "white", 
                                          colour = NA), 
          axis.line = element_line(colour = "grey20"),
          panel.grid.minor = element_blank(), 
          panel.grid.major = element_blank(),
          legend.key = element_rect(fill = "white", 
                                    colour = NA),
          axis.text=element_text(size=11),
          axis.title=element_text(size=11), 
          complete = FALSE)
}
dtf <- function(..., StAsFa= FALSE) {
  data.frame(..., stringsAsFactors = StAsFa)
}

cfSim <- function(n) {
  interest <- rnorm(n)
  noise <- rnorm(n)
  vals <- seq(-3, 3, 0.5)
  
  check <- function(bar) {
    responses <- (interest + noise) > bar
    
    out <- c(interest = mean(interest[responses]), rr=prop.table(table(responses))[2])
    return(out)
  }
  resp <- t(sapply(vals, check))
  resp[, 'interest'] <- resp[, 'interest'] - mean(interest) 
  out1 <- as.list(data.frame(resp))
  return(out1)
}



twoStageSim <- function(beta2, beta3, beta4, beta5) {
  n.sim = 10000
  prosoc <- rnorm(n.sim)
  interest <- rnorm(n.sim) + beta5 * prosoc
  interest <- as.vector(scale(interest))
  interestCheck <- function(stage1.bar, stage2.bar, beta2, beta3, beta4) {
    pass.1 <- (beta2 * prosoc)>stage1.bar
    pass.2 <- (beta3 * prosoc+beta4 * interest)> stage2.bar 
    mean.by.groups <- tapply(interest, list(stage1 = pass.1, stage2 = pass.2), mean)
    group.sizes <- table(pass.1, pass.2)
    group.weights <- mean.by.groups * group.sizes
    
    out <- list(interest.sample = mean(interest[pass.1 & pass.2]) - mean(interest), 
                rr = mean(pass.1&pass.2),
                contact.rate = mean(pass.1),
                coop.rate = mean(pass.2[pass.1==1]),
                mean.by.groups = round(mean.by.groups, 1), 
                group.sizes = group.sizes, 
                group.weights = round(group.weights, 2))
    return(out)
  }
  vals <- seq(-3, 3, 0.5)
  
  out <- list(change.screen = t(sapply(vals, interestCheck, stage2.bar=0, beta2=beta2, beta3 = beta3, beta4 = beta4)[1:4, ]) , 
              change.cooper = t(sapply(vals, interestCheck, stage1.bar=0, beta2=beta2, beta3 = beta3, beta4 = beta4)[1:4, ]))
  return(out)
}

createPlots <- function(beta2, beta3, beta4, beta5, n) {
  sims.twostage <- replicate(n, twoStageSim(beta2 = beta2, beta3 = beta3, beta4 = beta4, beta5 = beta5), simplify = F)  
  sim.change.coop <- dtf(rr = rowMeans(sapply(sims.twostage, function(x) unlist(x$change.cooper[, "rr"]))), 
                         interest = rowMeans(sapply(sims.twostage, function(x) unlist(x$change.cooper[, "interest.sample"]))))
  sim.change.contact <- dtf(rr = rowMeans(sapply(sims.twostage, function(x) unlist(x$change.screen[, "rr"]))), 
                            interest = rowMeans(sapply(sims.twostage, function(x) unlist(x$change.screen[, "interest.sample"]))))
  
  sim.change.coop <- dtf(rr = rowMeans(sapply(sims.twostage, function(x) unlist(x$change.cooper[, "rr"]))), 
                         interest = rowMeans(sapply(sims.twostage, function(x) unlist(x$change.cooper[, "interest.sample"]))))
  
  all.vals <- c(sim.change.coop$interest, sim.change.contact$interest)
  all.vals <- na.omit(all.vals)
  change.coop.plot <- ggplot(sim.change.coop, aes(x = rr, y = interest)) + geom_point() + geom_line() + 
    ylab("Overrepresentation of high interest respondents (z-score)") + theme_bes() + 
    xlab("Response rate") + scale_x_continuous(labels = percent) + 
    geom_vline(xintercept = sim.change.contact$rr[7], linetype = 2)+
    scale_y_continuous(limits = c(min(all.vals), 
                                  max(all.vals))) + ggtitle("RR change from changing cooperation threshold")
  
  
  change.contact.plot <- ggplot(sim.change.contact, aes(x = rr, y = interest)) + geom_point() + geom_line() + 
    ylab("Overrepresentation of high interest respondents (z-score)") + theme_bes() + 
    xlab("Response rate") + scale_x_continuous(labels = percent) +
    geom_vline(xintercept = sim.change.contact$rr[7], linetype = 2)+
    scale_y_continuous(limits = c(min(all.vals), 
                                  max(all.vals))) + 
    ggtitle("RR change from changing contact threshold")
  comb.plot <- arrangeGrob(change.contact.plot, change.coop.plot, ncol = 2)
  out <- list(contact.change = list(plot = change.contact.plot, table = sim.change.contact), 
              coop.change = list(plot = change.coop.plot, 
                                 table = sim.change.coop), 
              comb.plot = comb.plot)
}

#### Appendix Figure 1 ####
set.seed(2393)
sim <- replicate(1000, cfSim(n = 10000), simplify = F)
cf.model <- data.frame(interest = rowMeans(sapply(sim, function(x) x$interest)),
                rr = rowMeans(sapply(sim, function(x) x$rr))  )

app.fig.1 <- ggplot(cf.model, aes(x = rr, y = interest)) + geom_point() + geom_line() + 
  ylab("Overrepresentation of high interest respondents (SD)") + theme_bes() + 
  xlab("Response rate") + scale_x_continuous(labels = percent)
pdf("Figures/Figure_A1.pdf", width=8, height=6) 
plot(app.fig.1)
dev.off()

set.seed(4646)

#### Appendix Figure 2 ####
plots.basic <- createPlots(n = 1000, beta2 = 1, beta3 = 1, beta4 = 1, beta5= 0)

pdf("Figures/Figure_A2.pdf", width=12, height=6) 
plot(plots.basic$comb.plot)
dev.off()

#### Appendix Figure 3 ####
plots.neg.cor <- createPlots(n = 1000, beta2 = 1, beta3 = 1, beta4 = 1, beta5= -1)

pdf("Figures/Figure_A3.pdf", width=12, height=6) 
plot(plots.neg.cor$comb.plot)
dev.off()